clear all
set more off
cap log close

****************************************************************************
* This do-file extract data from the Read-only LAD files
*	- age, sex, family type and business income
* 
* Creates the following Stata files:
*	- demographic_yyyy.dta.dta
* 	- demographic_8299.dta (appended files)
*   - demographic_0016.dta (appended files)
* 
****************************************************************************

do "H:\Lavecchia_7086\to-transfer-jan-2022\RESTAT_Replication_Programs\0_Set_Directories.do"
cap log using "$dir_log\1c_Extract_Demographics.log", replace

* Step 1: load the data from LAD read-only files

forvalues year=1982(1)1985{
use lin__i age__i`year' cd81_i`year' fcmp_i`year' fsizei`year' psco_i`year' sxco_i`year' tnkidi`year' tnk19i`year' iemcop`year' wgt2_i  using "G:\LAD\LAD_DAL_Allcycle_v3\data\data\Stata\lad`year'.dta"
sort lin__i
gen year=`year'
foreach var in age__i cd81_i fcmp_i fsizei psco_i sxco_i tnkidi tnk19i iemcop {
rename `var'`year' `var'
}
rename cd81_i cd_i
save "$dir_data\Capital income years\demographic_`year'.dta", replace
}

* Census division variable name changes over time 

* For years 1986-1990, cd86_i`year' 
forvalues year=1986(1)1990{
use lin__i age__i`year' cd86_i`year' fcmp_i`year' fsizei`year' psco_i`year' sxco_i`year' tnkidi`year' tnk19i`year' iemcop`year' wgt2_i  using "G:\LAD\LAD_DAL_Allcycle_v3\data\data\Stata\lad`year'.dta"
sort lin__i
gen year=`year'
foreach var in age__i cd86_i fcmp_i fsizei psco_i sxco_i tnkidi tnk19i iemcop{
rename `var'`year' `var'
}
rename cd86_i cd_i
save "$dir_data\Capital income years\demographic_`year'.dta", replace
}
* For years 1991-1995, cd91_i`year'
forvalues year=1991(1)1995{
use lin__i age__i`year' cd91_i`year' fcmp_i`year' fsizei`year' psco_i`year' sxco_i`year' tnkidi`year' tnk19i`year' iemcop`year' wgt2_i  using "G:\LAD\LAD_DAL_Allcycle_v3\data\data\Stata\lad`year'.dta"
sort lin__i
gen year=`year'
foreach var in age__i cd91_i fcmp_i fsizei psco_i sxco_i tnkidi tnk19i iemcop {
rename `var'`year' `var'
}
rename cd91_i cd_i
save "$dir_data\Capital income years\demographic_`year'.dta", replace
}
* For years 1996-2000, cd96_i`year'
forvalues year=1996(1)2000{
use lin__i age__i`year' cd96_i`year' fcmp_i`year' fsizei`year' psco_i`year' sxco_i`year' tnkidi`year' tnk19i`year' iemcop`year' wgt2_i  using "G:\LAD\LAD_DAL_Allcycle_v3\data\data\Stata\lad`year'.dta"
sort lin__i
gen year=`year'
foreach var in age__i cd96_i fcmp_i fsizei psco_i sxco_i tnkidi tnk19i iemcop{
rename `var'`year' `var'
}
rename cd96_i cd_i
save "$dir_data\Capital income years\demographic_`year'.dta", replace
}
* For years 2001-2005, cd01_i`year'
forvalues year=2001(1)2005{
use lin__i age__i`year' cd01_i`year' fcmp_i`year' fsizei`year' psco_i`year' sxco_i`year' tnkidi`year' tnk19i`year' iemcop`year' wgt2_i  using "G:\LAD\LAD_DAL_Allcycle_v3\data\data\Stata\lad`year'.dta"
sort lin__i
gen year=`year'
foreach var in age__i cd01_i fcmp_i fsizei psco_i sxco_i tnkidi tnk19i iemcop{
rename `var'`year' `var'
}
rename cd01_i cd_i
save "$dir_data\Capital income years\demographic_`year'.dta", replace
}

* For years 2006-2010, cd06_i`year'
forvalues year=2006(1)2010{
use lin__i age__i`year' cd06_i`year' fcmp_i`year' fsizei`year' psco_i`year' sxco_i`year' tnkidi`year' tnk19i`year' iemcop`year' wgt2_i  using "G:\LAD\LAD_DAL_Allcycle_v3\data\data\Stata\lad`year'.dta"
sort lin__i
gen year=`year'
foreach var in age__i cd06_i fcmp_i fsizei psco_i sxco_i tnkidi tnk19i iemcop{
rename `var'`year' `var'
}
rename cd06_i cd_i
save "$dir_data\Capital income years\demographic_`year'.dta", replace
}
* For years 2011-2015, cd11_i`year'
forvalues year=2011(1)2015{
use lin__i age__i`year' cd11_i`year' fcmp_i`year' fsizei`year' psco_i`year' sxco_i`year' tnkidi`year' tnk19i`year' iemcop`year' wgt2_i  using "G:\LAD\LAD_DAL_Allcycle_v3\data\data\Stata\lad`year'.dta"
sort lin__i
gen year=`year'
foreach var in age__i cd11_i fcmp_i fsizei psco_i sxco_i tnkidi tnk19i iemcop{
rename `var'`year' `var'
}
rename cd11_i cd_i
save "$dir_data\Capital income years\demographic_`year'.dta", replace
}

* For years 2011-2015, cd11_i`year'
forvalues year=2016(1)2016{
use lin__i age__i`year' cd16_i`year' fcmp_i`year' fsizei`year' psco_i`year' sxco_i`year' tnkidi`year' tnk19i`year' iemcop`year' wgt2_i  using "G:\LAD\LAD_DAL_Allcycle_v3\data\data\Stata\lad`year'.dta"
sort lin__i
gen year=`year'
foreach var in age__i cd16_i fcmp_i fsizei psco_i sxco_i tnkidi tnk19i iemcop{
rename `var'`year' `var'
}
rename cd16_i cd_i
save "$dir_data\Capital income years\demographic_`year'.dta", replace
}


* Step 2: append into one file (rename the variables to drop the year portion of the name)

**************** 1982 -- 1999 ****************
clear
use "$dir_data\Capital income years\demographic_1982.dta"
forvalues year=1983/1999{
append using "$dir_data\Capital income years\demographic_`year'.dta"
compress
}
*
save "$dir_data\demographic_8299.dta", replace


**************** 2000 -- 2016 ****************
clear
use "$dir_data\Capital income years\demographic_2000.dta"
forvalues year=2001/2016{
append using "$dir_data\Capital income years\demographic_`year'.dta"
compress
}
*
save "$dir_data\demographic_0016.dta", replace
